Data Preparation

# smacof analysis of 1996 wine data
# Lew Harvey
# 7 February 2016

library("smacof")
library("rgl")
library("psych")

rm(list = ls())

Read in the data and wine properties

fn <- "wine_96(11Ss).csv"
df <- read.csv(fn, header = TRUE)

# read in wine properties
df.prop <- read.csv("WineQuality2_96_Data.csv", header = TRUE)
df.prop$type <- c(rep("yellow", 5), rep("red", 5))

The data in the input file looks like this for the first subject:

head(df, 10)
##    wa wb wc wd we wf wg wh wi wj
## 1   0 NA NA NA NA NA NA NA NA NA
## 2   5  0 NA NA NA NA NA NA NA NA
## 3   5  6  0 NA NA NA NA NA NA NA
## 4   3  5  4  0 NA NA NA NA NA NA
## 5   4  4  7  2  0 NA NA NA NA NA
## 6   7  8  7  8  7  0 NA NA NA NA
## 7   5  4  6  4  8  6  0 NA NA NA
## 8   7  5  9  8  6  7  2  0 NA NA
## 9   4  4  8  8  6  4  6  7  0 NA
## 10  8  7  8  4  5  6  3  6  3  0

Convert the data into a list of matrices, one for each subject. This is a bit more complicated than it should be, but the original data are in an SPSS file that requires some massaging to get the data into a form for R

# loop through the data frame to
# create separate matrices for the
# data sets of each subject

nstim <- ncol(df)
nsets <- nrow(df) / ncol(df)
mlist <- list(NA)
mm <- list(NA)
colnames(df) <- c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J")
winename <- colnames(df)

for (i in 1:nsets){
    mm[i] <- paste("m", i, sep = "")
    i1 <- ((i - 1) * nstim) + 1
    i2 <- i1 + nstim - 1
    mlist[i] <- list(mm = as.matrix(df[i1:i2,]))
    row.names(mlist[[i]]) <- winename
    mlist[[i]] <- as.dist(mlist[[i]])
}
names(mlist) <- unlist(mm)

Now the matrices are in a list, one for each subject. Here are the first subject’s data as a distance matrix:

mlist[1]
## $m1
##   A B C D E F G H I
## B 5                
## C 5 6              
## D 3 5 4            
## E 4 4 7 2          
## F 7 8 7 8 7        
## G 5 4 6 4 8 6      
## H 7 5 9 8 6 7 2    
## I 4 4 8 8 6 4 6 7  
## J 8 7 8 4 5 6 3 6 3

2D INDSCAL Solution

wine2D <- smacofIndDiff(mlist, ndim = 2,
                        type = "ordinal",
                        constraint = "indscal",
                        itmax = 1000)
wine2D
## 
## Call: smacofIndDiff(delta = mlist, ndim = 2, type = "ordinal", constraint = "indscal", 
##     itmax = 1000)
## 
## Model: Three-way SMACOF 
## Number of objects: 10 
## Stress-1 value: 0.209 
## Number of iterations: 329

Plot 2D Solution and Fits

par(mfcol = c(2, 2))
par(pty = "s")
plot(wine2D, type = "p",
     plot.type = "confplot",
     label.conf = list(TRUE, 1, 1),
     xlim = c(-0.9, 0.9),
     ylim = c(-0.9, 0.9))
abline(h = 0, col = "gray")
abline(v = 0, col = "gray")
par(pty = "m")

plot(wine2D, plot.type = "Shepard") 
plot(wine2D, plot.type = "stressplot", ylim = c(0, 25))
plot(wine2D, plot.type = "resplot")

par(mfcol = c(1, 1))

Configuration Plot without Color

par(pty = "s")
plot(wine2D, type = "p", pch = 19,
     plot.type = "confplot",
     label.conf = list(TRUE, 1, 1),
     xlim = c(-0.9, 0.9),
     ylim = c(-0.9, 0.9))
abline(h = 0, col = "gray")
abline(v = 0, col = "gray")

par(pty = "m")

Configuration Plot with Color

par(pty = "s")
plot(wine2D, type = "p", pch = 19,
     col = df.prop$type,
     plot.type = "confplot",
     label.conf = list(TRUE, 1, 1),
     xlim = c(-0.9, 0.9),
     ylim = c(-0.9, 0.9))
abline(h = 0, col = "gray")
abline(v = 0, col = "gray")

par(pty = "m")

Individual 2D Subject Spaces

Another plot we can make is the position of each stimulus in the individual subject space. These configurations are stored in the $conf attribute in the scaling results object.att (use the attributes(wine2D) command to see all the parts contained in the INDSCAL results object)

par(pty = "s")
ns <- length(wine2D$conf)
pltmax <- max(abs(unlist(wine2D$conf)))
plot(0, 0, type = "n",
     xlim = c(-pltmax, pltmax),
     ylim = c(-pltmax, pltmax))
# add the stimulus points from individual subject spaces
for (i in 1:ns){
    points(wine2D$conf[[i]][,"D1"], wine2D$conf[[i]][,"D2"],
           col = df.prop$type)}
abline(v = 0, col = "gray")
abline(h = 0, col = "gray")

par(pty = "m")

Subject Weights in 2D

Here we plot the weights of the individual subjects on each dimension. The code for extracting the weights from the results seems complicated but basically involves converting a list of matrices into a data frame based just on the diagonals of the matrices.

sswgts2D <- t(as.data.frame(lapply(wine2D$cweights, diag)))
par(pty = "s")
plot(D2 ~ D1, data = sswgts2D,
     xlim = c(0, 2),
     ylim = c(0, 2),
     main = "Subject weights in 2D")
abline(0, 1, col = "gray")

par(pty = "m")

3D solution

wine3D <- smacofIndDiff(mlist,
                        ndim = 3,
                        type = "ordinal",
                        constraint = "indscal",
                        verbose = FALSE,
                        itmax = 5000)
wine3D
## 
## Call: smacofIndDiff(delta = mlist, ndim = 3, type = "ordinal", constraint = "indscal", 
##     verbose = FALSE, itmax = 5000)
## 
## Model: Three-way SMACOF 
## Number of objects: 10 
## Stress-1 value: 0.152 
## Number of iterations: 1543

Plot the 3D solution without color

This plot cannot be printed since it is 3D and can be rotated by the user

plot3d(wine3D$gspace, type = "s",
       expand = 1.3,
       xlab = "X",
       ylab = "Y",
       zlab = "Z")
text3d(wine3D$gspace,
       text = rownames(wine3D$gspace),
       adj = c(2, 2))

Plot the 3D solution with color

This plot cannot be printed since it is 3D and can be rotated by the user

plot3d(wine3D$gspace, type = "s",
       expand = 1.3,
       col = df.prop$type,
       xlab = "Wine Color",
       ylab = "Y",
       zlab = "Z")
text3d(wine3D$gspace,
       text = rownames(wine3D$gspace),
       adj = c(2, 2))

Individual 3D Subject Spaces

Another plot we can make is the position of each stimulus in the individual subject space. These configurations are stored in the $conf attribute in the scaling results object.att (use the attributes(wine2D) command to see all the parts contained in the INDSCAL results object)

par(pty = "s")
ns <- length(wine3D$conf)
pltmax <- max(abs(unlist(wine3D$conf)))
plot3d(0, 0, type = "n",
       xlim = c(-pltmax, pltmax),
       ylim = c(-pltmax, pltmax),
       zlim = c(-pltmax, pltmax))
# add the stimulus points from individual subject spaces
for (i in 1:ns){
    plot3d(wine3D$conf[[i]],
           add = TRUE,
           radius = .04,
           type = "s",
           col = df.prop$type)
}
par(pty = "m")

Subject Weights in 3D

Here we plot the weights of the individual subjects on each dimension. The code for extracting the weights from the results seems complicated but basically involves converting a list of matrices into a data frame based just on the diagonals of the matrices.

sswgts3D <- t(as.data.frame(lapply(wine3D$cweights, diag)))
par(pty = "s")
plot3d(sswgts3D,
       xlim = c(0, 2),
       ylim = c(0, 2),
       zlim = c(0, 2),
       main = "Subject weights in 3D")
lines3d(xyz.coords(c(0, 2), c(0, 2), c(0, 2)), col = "gray")
par(pty = "m")

Properties of the wines

df.prop[1:10, 1:8]
##    Subject   Wine  Body Color Aroma Fruitiness Nuttiness Sweetness
## 1     Mean Wine A 4.000 3.250 3.625      8.000     2.571     6.714
## 2     Mean Wine B 3.571 3.143 3.429      9.143     1.286     8.714
## 3     Mean Wine C 1.250 2.857 3.750      9.250     1.250    11.333
## 4     Mean Wine D 3.429 3.125 4.625      5.750     3.000     5.500
## 5     Mean Wine E 2.857 3.714 3.714      7.000     2.286     6.286
## 6     Mean Wine F 6.000 7.143 5.125      4.571     3.857     3.286
## 7     Mean Wine G 6.571 7.000 5.750      5.125     5.143     3.625
## 8     Mean Wine H 7.000 8.000 5.123      2.875     3.500     2.250
## 9     Mean Wine I 6.286 8.000 5.714      4.571     4.571     4.429
## 10    Mean Wine J 7.375 7.750 5.000      4.500     4.500     3.625

Properties of wine vs 2D solution

qual <- names(df.prop)[3:15]
tmp <- biplotmds(wine2D, df.prop[, qual])
op <- par(pty = "s")
plot(tmp, col = df.prop$type,
     xlim = c(-2, 2),
     ylim = c(-2, 2))

par(op)
tmp
## 
## Call:
## lm(formula = ext ~ -1 + X)
## 
## Coefficients:
##     Body      Color     Aroma     Fruitiness  Nuttiness  Sweetness
## D1   1.56941   1.62388   1.48247  -1.62652     1.50177   -1.59136 
## D2   0.43480  -0.62716  -0.58834   0.39246     0.32249   -0.20236 
##     Tannic.Acid  Dryness   Complexity  Roundness  Spiciness  Quality 
## D1   1.56137      1.65025   1.50801     1.28171    1.26017    1.41432
## D2   0.02995      0.10981   0.22030     1.08774   -0.40636    0.87796
##     Openness
## D1   0.34505
## D2   0.08177

Properties of wine vs 3D solution

qual <- names(df.prop)[3:15]
tmp <- biplotmds(wine3D, df.prop[, qual])
tmpt <- t(coefficients(tmp))
tmpt
##                     D1          D2         D3
## Body         1.7301702 -0.21723772 -0.2241366
## Color        1.7305865 -0.92643591  0.3007068
## Aroma        1.5139030 -1.57146597 -0.9948987
## Fruitiness  -1.6294741  0.59845718  0.2962896
## Nuttiness    1.5752536 -0.58996641 -1.5928500
## Sweetness   -1.6408350  0.06828168  0.6379175
## Tannic.Acid  1.6757069 -0.73310109 -0.7947114
## Dryness      1.7374746 -0.12277573 -0.2621727
## Complexity   1.6724328 -0.51488529 -0.3554648
## Roundness    1.5132445  0.16797693 -0.8673027
## Spiciness    1.3965937 -1.50479412 -0.4003672
## Quality      1.5686664  1.08713842  0.1210472
## Openness     0.3812162  0.26891855  0.7111767
op <- par(pty = "s")
plot(tmp, col = df.prop$type,
     xlim = c(-2, 2),
     ylim = c(-2, 2))

par(op)
fa.pc <- fa.parallel(df.prop[, qual], fa = "pc")
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## In factor.scores, the correlation matrix is singular, an approximation is used
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## Parallel analysis suggests that the number of factors =  NA  and the number of components =  1
abline(h = 1)

pc.qual <- principal(df.prop[, qual], nfactors = 2, rotate = "varimax")
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## In factor.scores, the correlation matrix is singular, an approximation is used
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
pc.qual
## Principal Components Analysis
## Call: principal(r = df.prop[, qual], nfactors = 2, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##               PC1   PC2   h2    u2 com
## Body         0.96  0.16 0.94 0.058 1.1
## Color        0.95  0.02 0.91 0.092 1.0
## Aroma        0.90  0.04 0.82 0.183 1.0
## Fruitiness  -0.91 -0.20 0.86 0.138 1.1
## Nuttiness    0.93  0.10 0.88 0.117 1.0
## Sweetness   -0.89 -0.37 0.93 0.067 1.3
## Tannic.Acid  0.99  0.01 0.99 0.010 1.0
## Dryness      0.96  0.21 0.96 0.044 1.1
## Complexity   0.97  0.03 0.94 0.063 1.0
## Roundness    0.88  0.08 0.79 0.214 1.0
## Spiciness    0.90 -0.35 0.94 0.062 1.3
## Quality      0.78  0.51 0.86 0.141 1.7
## Openness     0.01  0.93 0.87 0.129 1.0
## 
##                         PC1  PC2
## SS loadings           10.17 1.51
## Proportion Var         0.78 0.12
## Cumulative Var         0.78 0.90
## Proportion Explained   0.87 0.13
## Cumulative Proportion  0.87 1.00
## 
## Mean item complexity =  1.1
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.05 
##  with the empirical chi square  3.47  with prob <  1 
## 
## Fit based upon off diagonal values = 1